from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import plotly.express as px
Musk = pd.read_csv("Musk1.csv", header = None)
Musk.head()
Musk_Feat = Musk.iloc[:,2:Musk.shape[1]]
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
from sklearn.preprocessing import scale
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
Musk_Feat = Musk.iloc[:,2:Musk.shape[1]]
Musk_Feat = pd.DataFrame(scale(Musk_Feat),columns=Musk_Feat.columns)
# mds
mds = MDS(n_components=3)
musk_mds = pd.DataFrame(mds.fit_transform(Musk_Feat))
# pca
pca = PCA(n_components=3)
musk_pca = pd.DataFrame(pca.fit_transform(Musk_Feat))
# join labels
musk_mds = musk_mds.join(Musk.iloc[:,0],rsuffix = "_label")
musk_pca = musk_pca.join(Musk.iloc[:,0],rsuffix = "_label")
musk_pca = musk_pca.rename(columns = {'0': "pc_1", 1: 'pc_2', 2: 'pc_3',"0_label" : "label"})
musk_mds = musk_mds.rename(columns = {'0': "latent_1", 1: 'latent_2', 2: 'latent_3',"0_label" : "label"})
musk_pca["label"] = musk_pca["label"].astype(str)
musk_mds["label"] = musk_mds["label"].astype(str)
musk_pca.head()
musk_mds.head()
fig = px.scatter_3d(musk_pca, x="pc_1", y="pc_2", z="pc_3",
color='label', width = 800, height = 800 ,title = "PCA with 3 Components")
fig.show()
fig = px.scatter_3d(musk_mds, x="latent_1", y="latent_2", z="latent_3",
color='label', width = 800, height = 800 ,title = "MDS with 3 Latent Variables")
fig.show()
Musk = Musk.rename(columns={0: "BAGLABEL", 1: "BAGID"})
Musk = Musk.groupby('BAGID').mean().reset_index(drop=True)
Musk.head()
Musk_Feat = Musk.iloc[:,2:Musk.shape[1]]
Musk_Feat = pd.DataFrame(scale(Musk_Feat),columns=Musk_Feat.columns)
# mds
mds = MDS(n_components=3)
musk_mds = pd.DataFrame(mds.fit_transform(Musk_Feat))
# pca
pca = PCA(n_components=3)
musk_pca = pd.DataFrame(pca.fit_transform(Musk_Feat))
# join labels
musk_mds = musk_mds.join(Musk.iloc[:,0],rsuffix = "_label")
musk_pca = musk_pca.join(Musk.iloc[:,0],rsuffix = "_label")
musk_pca = musk_pca.rename(columns = {0: "pc_1", 1: 'pc_2', 2: 'pc_3',"BAGLABEL" : "label"})
musk_mds = musk_mds.rename(columns = {0: "latent_1", 1: 'latent_2', 2: 'latent_3',"BAGLABEL" : "label"})
musk_pca["label"] = musk_pca["label"].astype(str)
musk_mds["label"] = musk_mds["label"].astype(str)
musk_pca
fig = px.scatter_3d(musk_pca, x="pc_1", y="pc_2", z="pc_3",
color='label', width = 800, height = 800 ,title = "PCA with 3 Components")
fig.show()
fig = px.scatter_3d(musk_mds, x="latent_1", y="latent_2", z="latent_3",
color='label', width = 800, height = 800 ,title = "MDS with 3 Latent Variables")
fig.show()
from statistics import mode
def most_common(x):
try:
return(mode(list(a)))
except:
temp = pd.DataFrame(data = {'level': x, 'countno' : np.repeat(1, len(x))})
temp = temp.groupby("level").count().reset_index()
temp["maxvalue"] = temp.countno.max()
return(temp.loc[temp.countno == temp.maxvalue].level.mean())
Musk = pd.read_csv("Musk1.csv", header = None)
Musk = Musk.rename(columns={0: "BAGLABEL", 1: "BAGID"})
Musk = Musk.groupby('BAGID').agg(lambda x: most_common(x))
Musk.head()
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
img = mpimg.imread('Foto_256_256.jpg')
plt.imshow(img)
from copy import deepcopy
img_noise = deepcopy(img)
MinPixel = img_noise.min()
MaxPixel = img_noise.max()
img_noise = img_noise + np.random.randint(MinPixel,(MaxPixel * 0.1),size = img_noise.shape)
# max value cannot exceed 255
img_noise[img_noise >= 255] = 255
plt.imshow(img_noise)
img_noise_red = deepcopy(img_noise)
img_noise_green = deepcopy(img_noise)
img_noise_blue = deepcopy(img_noise)
img_noise_red[:,:,1] = 0
img_noise_red[:,:,2] = 0
img_noise_green[:,:,0] = 0
img_noise_green[:,:,2] = 0
img_noise_blue[:,:,0] = 0
img_noise_blue[:,:,1] = 0
fig, ax = plt.subplots(ncols = 2, nrows = 2, figsize = (15,15))
ax[0,0].imshow(img_noise)
ax[0,0].set_title("Original")
ax[0,1].imshow(img_noise_red)
ax[0,1].set_title("Red Channel")
ax[1,0].imshow(img_noise_green)
ax[1,0].set_title("Green Channel")
ax[1,1].imshow(img_noise_blue)
ax[1,1].set_title("Blue Channel")
# https://stackoverflow.com/questions/12201577/how-can-i-convert-an-rgb-image-into-grayscale-in-python
img_grayscale = np.dot(img_noise[...,:3], [0.2989, 0.5870, 0.1140])
plt.imshow(img_grayscale)
from sklearn.feature_extraction import image
patches = image.extract_patches_2d(img_grayscale, (25, 25))
patches = patches.reshape((53824,625))
pca = PCA(n_components = 100)
pca.fit(patches)
from plotnine import *
pca_cumvar = pd.DataFrame(data = {'PC' : np.arange(1,len(pca.explained_variance_ratio_) + 1), 'CUMVAR' : pca.explained_variance_ratio_.cumsum()})
ggplot(pca_cumvar) + geom_line(aes(x = "PC", y = "CUMVAR" ), colour = "r")
pca_cumvar.head()
pca_im = PCA(n_components = 3)
principals = pca_im.fit_transform(patches)
fig, ax = plt.subplots(1,3,figsize = (25,25))
for i in range(3):
im_comp = principals[:,i].reshape(232,232)
ax[i].imshow(im_comp)
ax[i].set_title("Component {}".format(i+1))
pca_im.explained_variance_ratio_
fig, ax = plt.subplots(1,3,figsize = (25,25))
for i in range(3):
eigen = pca_im.components_[i].reshape(25, 25)
ax[i].imshow(eigen)
ax[i].set_title("Eigenvector {}".format(i+1))